{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "199c565b-7da9-43b9-a92e-a7de232caf22",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/david/anaconda3/envs/peft/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from awq import AutoAWQForCausalLM\n",
    "from transformers import AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "75bd7575-712b-4c5b-9a5a-b9cc80c73189",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_name_or_path = \"facebook/opt-6.7b\"\n",
    "quan_model_dir = \"../models/opt-6.7b-awq\"\n",
    "\n",
    "quant_config = {\n",
    "  #  \"zero_point\": True,\n",
    "    \"q_group_size\": 128,\n",
    "    \"w_bit\": 4,\n",
    "    \"version\": \"GEMM\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "174dbd8f-8670-4cde-a7c0-7d0bf2674bd8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching 12 files: 100%|███████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 41154.25it/s]\n",
      "Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████| 2/2 [00:49<00:00, 24.50s/it]\n"
     ]
    }
   ],
   "source": [
    "model = AutoAWQForCausalLM.from_pretrained(model_name_or_path,device_map=\"auto\", safetensors=False)\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65a9c867-8b4c-438d-a39b-429590275827",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Repo card metadata block was not found. Setting CardData to empty.\n",
      "Generating validation split: 214670 examples [00:18, 11485.56 examples/s]\n",
      "AWQ:   6%|█████▊                                                                                       | 2/32 [02:21<35:19, 70.64s/it]"
     ]
    }
   ],
   "source": [
    "model.quantize(tokenizer, quant_config=quant_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "38268e5f-6bde-4b78-a8cd-af8df9e04f2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AwqConfig, AutoConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7a0772a6-649a-43c4-8069-83c554677fb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "quantization_config = AwqConfig(\n",
    "    bits=quant_config[\"w_bit\"],\n",
    "    group_size=quant_config[\"q_group_size\"],\n",
    "    zero_point=True,\n",
    "    version=quant_config[\"version\"].lower(),\n",
    ").to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "47dc6e51-e95d-4118-aa79-33d2a98dba9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.model.config.quantization_config = quantization_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "d6b19e60-e809-4319-9802-4c5a8885bd13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('../models/opt-6.7b-awq/tokenizer_config.json',\n",
       " '../models/opt-6.7b-awq/special_tokens_map.json',\n",
       " '../models/opt-6.7b-awq/vocab.json',\n",
       " '../models/opt-6.7b-awq/merges.txt',\n",
       " '../models/opt-6.7b-awq/added_tokens.json',\n",
       " '../models/opt-6.7b-awq/tokenizer.json')"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.save_quantized(quan_model_dir)\n",
    "tokenizer.save_pretrained(quan_model_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "ea677a77-b5e4-45be-b41b-766f641d09ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "OptAWQForCausalLM(\n",
       "  (model): OPTForCausalLM(\n",
       "    (model): OPTModel(\n",
       "      (decoder): OPTDecoder(\n",
       "        (embed_tokens): Embedding(50272, 4096, padding_idx=1)\n",
       "        (embed_positions): OPTLearnedPositionalEmbedding(2050, 4096)\n",
       "        (final_layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n",
       "        (layers): ModuleList(\n",
       "          (0-31): 32 x OPTDecoderLayer(\n",
       "            (self_attn): OPTSdpaAttention(\n",
       "              (k_proj): Linear(in_features=4096, out_features=4096, bias=True)\n",
       "              (v_proj): Linear(in_features=4096, out_features=4096, bias=True)\n",
       "              (q_proj): Linear(in_features=4096, out_features=4096, bias=True)\n",
       "              (out_proj): Linear(in_features=4096, out_features=4096, bias=True)\n",
       "            )\n",
       "            (activation_fn): ReLU()\n",
       "            (self_attn_layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n",
       "            (fc1): Linear(in_features=4096, out_features=16384, bias=True)\n",
       "            (fc2): Linear(in_features=16384, out_features=4096, bias=True)\n",
       "            (final_layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (lm_head): Linear(in_features=4096, out_features=50272, bias=False)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "480fe6ea-9bf8-452a-85cd-62f32854f816",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.44it/s]\n",
      "Some weights of the model checkpoint at ../models/opt-6.7b-awq were not used when initializing OPTForCausalLM: ['model.decoder.layers.0.fc1.weight', 'model.decoder.layers.0.fc2.weight', 'model.decoder.layers.0.self_attn.k_proj.weight', 'model.decoder.layers.0.self_attn.out_proj.weight', 'model.decoder.layers.0.self_attn.q_proj.weight', 'model.decoder.layers.0.self_attn.v_proj.weight', 'model.decoder.layers.1.fc1.weight', 'model.decoder.layers.1.fc2.weight', 'model.decoder.layers.1.self_attn.k_proj.weight', 'model.decoder.layers.1.self_attn.out_proj.weight', 'model.decoder.layers.1.self_attn.q_proj.weight', 'model.decoder.layers.1.self_attn.v_proj.weight', 'model.decoder.layers.10.fc1.weight', 'model.decoder.layers.10.fc2.weight', 'model.decoder.layers.10.self_attn.k_proj.weight', 'model.decoder.layers.10.self_attn.out_proj.weight', 'model.decoder.layers.10.self_attn.q_proj.weight', 'model.decoder.layers.10.self_attn.v_proj.weight', 'model.decoder.layers.11.fc1.weight', 'model.decoder.layers.11.fc2.weight', 'model.decoder.layers.11.self_attn.k_proj.weight', 'model.decoder.layers.11.self_attn.out_proj.weight', 'model.decoder.layers.11.self_attn.q_proj.weight', 'model.decoder.layers.11.self_attn.v_proj.weight', 'model.decoder.layers.12.fc1.weight', 'model.decoder.layers.12.fc2.weight', 'model.decoder.layers.12.self_attn.k_proj.weight', 'model.decoder.layers.12.self_attn.out_proj.weight', 'model.decoder.layers.12.self_attn.q_proj.weight', 'model.decoder.layers.12.self_attn.v_proj.weight', 'model.decoder.layers.13.fc1.weight', 'model.decoder.layers.13.fc2.weight', 'model.decoder.layers.13.self_attn.k_proj.weight', 'model.decoder.layers.13.self_attn.out_proj.weight', 'model.decoder.layers.13.self_attn.q_proj.weight', 'model.decoder.layers.13.self_attn.v_proj.weight', 'model.decoder.layers.14.fc1.weight', 'model.decoder.layers.14.fc2.weight', 'model.decoder.layers.14.self_attn.k_proj.weight', 'model.decoder.layers.14.self_attn.out_proj.weight', 'model.decoder.layers.14.self_attn.q_proj.weight', 'model.decoder.layers.14.self_attn.v_proj.weight', 'model.decoder.layers.15.fc1.weight', 'model.decoder.layers.15.fc2.weight', 'model.decoder.layers.15.self_attn.k_proj.weight', 'model.decoder.layers.15.self_attn.out_proj.weight', 'model.decoder.layers.15.self_attn.q_proj.weight', 'model.decoder.layers.15.self_attn.v_proj.weight', 'model.decoder.layers.16.fc1.weight', 'model.decoder.layers.16.fc2.weight', 'model.decoder.layers.16.self_attn.k_proj.weight', 'model.decoder.layers.16.self_attn.out_proj.weight', 'model.decoder.layers.16.self_attn.q_proj.weight', 'model.decoder.layers.16.self_attn.v_proj.weight', 'model.decoder.layers.17.fc1.weight', 'model.decoder.layers.17.fc2.weight', 'model.decoder.layers.17.self_attn.k_proj.weight', 'model.decoder.layers.17.self_attn.out_proj.weight', 'model.decoder.layers.17.self_attn.q_proj.weight', 'model.decoder.layers.17.self_attn.v_proj.weight', 'model.decoder.layers.18.fc1.weight', 'model.decoder.layers.18.fc2.weight', 'model.decoder.layers.18.self_attn.k_proj.weight', 'model.decoder.layers.18.self_attn.out_proj.weight', 'model.decoder.layers.18.self_attn.q_proj.weight', 'model.decoder.layers.18.self_attn.v_proj.weight', 'model.decoder.layers.19.fc1.weight', 'model.decoder.layers.19.fc2.weight', 'model.decoder.layers.19.self_attn.k_proj.weight', 'model.decoder.layers.19.self_attn.out_proj.weight', 'model.decoder.layers.19.self_attn.q_proj.weight', 'model.decoder.layers.19.self_attn.v_proj.weight', 'model.decoder.layers.2.fc1.weight', 'model.decoder.layers.2.fc2.weight', 'model.decoder.layers.2.self_attn.k_proj.weight', 'model.decoder.layers.2.self_attn.out_proj.weight', 'model.decoder.layers.2.self_attn.q_proj.weight', 'model.decoder.layers.2.self_attn.v_proj.weight', 'model.decoder.layers.20.fc1.weight', 'model.decoder.layers.20.fc2.weight', 'model.decoder.layers.20.self_attn.k_proj.weight', 'model.decoder.layers.20.self_attn.out_proj.weight', 'model.decoder.layers.20.self_attn.q_proj.weight', 'model.decoder.layers.20.self_attn.v_proj.weight', 'model.decoder.layers.21.fc1.weight', 'model.decoder.layers.21.fc2.weight', 'model.decoder.layers.21.self_attn.k_proj.weight', 'model.decoder.layers.21.self_attn.out_proj.weight', 'model.decoder.layers.21.self_attn.q_proj.weight', 'model.decoder.layers.21.self_attn.v_proj.weight', 'model.decoder.layers.22.fc1.weight', 'model.decoder.layers.22.fc2.weight', 'model.decoder.layers.22.self_attn.k_proj.weight', 'model.decoder.layers.22.self_attn.out_proj.weight', 'model.decoder.layers.22.self_attn.q_proj.weight', 'model.decoder.layers.22.self_attn.v_proj.weight', 'model.decoder.layers.23.fc1.weight', 'model.decoder.layers.23.fc2.weight', 'model.decoder.layers.23.self_attn.k_proj.weight', 'model.decoder.layers.23.self_attn.out_proj.weight', 'model.decoder.layers.23.self_attn.q_proj.weight', 'model.decoder.layers.23.self_attn.v_proj.weight', 'model.decoder.layers.24.fc1.weight', 'model.decoder.layers.24.fc2.weight', 'model.decoder.layers.24.self_attn.k_proj.weight', 'model.decoder.layers.24.self_attn.out_proj.weight', 'model.decoder.layers.24.self_attn.q_proj.weight', 'model.decoder.layers.24.self_attn.v_proj.weight', 'model.decoder.layers.25.fc1.weight', 'model.decoder.layers.25.fc2.weight', 'model.decoder.layers.25.self_attn.k_proj.weight', 'model.decoder.layers.25.self_attn.out_proj.weight', 'model.decoder.layers.25.self_attn.q_proj.weight', 'model.decoder.layers.25.self_attn.v_proj.weight', 'model.decoder.layers.26.fc1.weight', 'model.decoder.layers.26.fc2.weight', 'model.decoder.layers.26.self_attn.k_proj.weight', 'model.decoder.layers.26.self_attn.out_proj.weight', 'model.decoder.layers.26.self_attn.q_proj.weight', 'model.decoder.layers.26.self_attn.v_proj.weight', 'model.decoder.layers.27.fc1.weight', 'model.decoder.layers.27.fc2.weight', 'model.decoder.layers.27.self_attn.k_proj.weight', 'model.decoder.layers.27.self_attn.out_proj.weight', 'model.decoder.layers.27.self_attn.q_proj.weight', 'model.decoder.layers.27.self_attn.v_proj.weight', 'model.decoder.layers.28.fc1.weight', 'model.decoder.layers.28.fc2.weight', 'model.decoder.layers.28.self_attn.k_proj.weight', 'model.decoder.layers.28.self_attn.out_proj.weight', 'model.decoder.layers.28.self_attn.q_proj.weight', 'model.decoder.layers.28.self_attn.v_proj.weight', 'model.decoder.layers.29.fc1.weight', 'model.decoder.layers.29.fc2.weight', 'model.decoder.layers.29.self_attn.k_proj.weight', 'model.decoder.layers.29.self_attn.out_proj.weight', 'model.decoder.layers.29.self_attn.q_proj.weight', 'model.decoder.layers.29.self_attn.v_proj.weight', 'model.decoder.layers.3.fc1.weight', 'model.decoder.layers.3.fc2.weight', 'model.decoder.layers.3.self_attn.k_proj.weight', 'model.decoder.layers.3.self_attn.out_proj.weight', 'model.decoder.layers.3.self_attn.q_proj.weight', 'model.decoder.layers.3.self_attn.v_proj.weight', 'model.decoder.layers.30.fc1.weight', 'model.decoder.layers.30.fc2.weight', 'model.decoder.layers.30.self_attn.k_proj.weight', 'model.decoder.layers.30.self_attn.out_proj.weight', 'model.decoder.layers.30.self_attn.q_proj.weight', 'model.decoder.layers.30.self_attn.v_proj.weight', 'model.decoder.layers.31.fc1.weight', 'model.decoder.layers.31.fc2.weight', 'model.decoder.layers.31.self_attn.k_proj.weight', 'model.decoder.layers.31.self_attn.out_proj.weight', 'model.decoder.layers.31.self_attn.q_proj.weight', 'model.decoder.layers.31.self_attn.v_proj.weight', 'model.decoder.layers.4.fc1.weight', 'model.decoder.layers.4.fc2.weight', 'model.decoder.layers.4.self_attn.k_proj.weight', 'model.decoder.layers.4.self_attn.out_proj.weight', 'model.decoder.layers.4.self_attn.q_proj.weight', 'model.decoder.layers.4.self_attn.v_proj.weight', 'model.decoder.layers.5.fc1.weight', 'model.decoder.layers.5.fc2.weight', 'model.decoder.layers.5.self_attn.k_proj.weight', 'model.decoder.layers.5.self_attn.out_proj.weight', 'model.decoder.layers.5.self_attn.q_proj.weight', 'model.decoder.layers.5.self_attn.v_proj.weight', 'model.decoder.layers.6.fc1.weight', 'model.decoder.layers.6.fc2.weight', 'model.decoder.layers.6.self_attn.k_proj.weight', 'model.decoder.layers.6.self_attn.out_proj.weight', 'model.decoder.layers.6.self_attn.q_proj.weight', 'model.decoder.layers.6.self_attn.v_proj.weight', 'model.decoder.layers.7.fc1.weight', 'model.decoder.layers.7.fc2.weight', 'model.decoder.layers.7.self_attn.k_proj.weight', 'model.decoder.layers.7.self_attn.out_proj.weight', 'model.decoder.layers.7.self_attn.q_proj.weight', 'model.decoder.layers.7.self_attn.v_proj.weight', 'model.decoder.layers.8.fc1.weight', 'model.decoder.layers.8.fc2.weight', 'model.decoder.layers.8.self_attn.k_proj.weight', 'model.decoder.layers.8.self_attn.out_proj.weight', 'model.decoder.layers.8.self_attn.q_proj.weight', 'model.decoder.layers.8.self_attn.v_proj.weight', 'model.decoder.layers.9.fc1.weight', 'model.decoder.layers.9.fc2.weight', 'model.decoder.layers.9.self_attn.k_proj.weight', 'model.decoder.layers.9.self_attn.out_proj.weight', 'model.decoder.layers.9.self_attn.q_proj.weight', 'model.decoder.layers.9.self_attn.v_proj.weight']\n",
      "- This IS expected if you are initializing OPTForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing OPTForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of OPTForCausalLM were not initialized from the model checkpoint at ../models/opt-6.7b-awq and are newly initialized: ['model.decoder.layers.0.fc1.qweight', 'model.decoder.layers.0.fc1.qzeros', 'model.decoder.layers.0.fc1.scales', 'model.decoder.layers.0.fc2.qweight', 'model.decoder.layers.0.fc2.qzeros', 'model.decoder.layers.0.fc2.scales', 'model.decoder.layers.0.self_attn.k_proj.qweight', 'model.decoder.layers.0.self_attn.k_proj.qzeros', 'model.decoder.layers.0.self_attn.k_proj.scales', 'model.decoder.layers.0.self_attn.out_proj.qweight', 'model.decoder.layers.0.self_attn.out_proj.qzeros', 'model.decoder.layers.0.self_attn.out_proj.scales', 'model.decoder.layers.0.self_attn.q_proj.qweight', 'model.decoder.layers.0.self_attn.q_proj.qzeros', 'model.decoder.layers.0.self_attn.q_proj.scales', 'model.decoder.layers.0.self_attn.v_proj.qweight', 'model.decoder.layers.0.self_attn.v_proj.qzeros', 'model.decoder.layers.0.self_attn.v_proj.scales', 'model.decoder.layers.1.fc1.qweight', 'model.decoder.layers.1.fc1.qzeros', 'model.decoder.layers.1.fc1.scales', 'model.decoder.layers.1.fc2.qweight', 'model.decoder.layers.1.fc2.qzeros', 'model.decoder.layers.1.fc2.scales', 'model.decoder.layers.1.self_attn.k_proj.qweight', 'model.decoder.layers.1.self_attn.k_proj.qzeros', 'model.decoder.layers.1.self_attn.k_proj.scales', 'model.decoder.layers.1.self_attn.out_proj.qweight', 'model.decoder.layers.1.self_attn.out_proj.qzeros', 'model.decoder.layers.1.self_attn.out_proj.scales', 'model.decoder.layers.1.self_attn.q_proj.qweight', 'model.decoder.layers.1.self_attn.q_proj.qzeros', 'model.decoder.layers.1.self_attn.q_proj.scales', 'model.decoder.layers.1.self_attn.v_proj.qweight', 'model.decoder.layers.1.self_attn.v_proj.qzeros', 'model.decoder.layers.1.self_attn.v_proj.scales', 'model.decoder.layers.10.fc1.qweight', 'model.decoder.layers.10.fc1.qzeros', 'model.decoder.layers.10.fc1.scales', 'model.decoder.layers.10.fc2.qweight', 'model.decoder.layers.10.fc2.qzeros', 'model.decoder.layers.10.fc2.scales', 'model.decoder.layers.10.self_attn.k_proj.qweight', 'model.decoder.layers.10.self_attn.k_proj.qzeros', 'model.decoder.layers.10.self_attn.k_proj.scales', 'model.decoder.layers.10.self_attn.out_proj.qweight', 'model.decoder.layers.10.self_attn.out_proj.qzeros', 'model.decoder.layers.10.self_attn.out_proj.scales', 'model.decoder.layers.10.self_attn.q_proj.qweight', 'model.decoder.layers.10.self_attn.q_proj.qzeros', 'model.decoder.layers.10.self_attn.q_proj.scales', 'model.decoder.layers.10.self_attn.v_proj.qweight', 'model.decoder.layers.10.self_attn.v_proj.qzeros', 'model.decoder.layers.10.self_attn.v_proj.scales', 'model.decoder.layers.11.fc1.qweight', 'model.decoder.layers.11.fc1.qzeros', 'model.decoder.layers.11.fc1.scales', 'model.decoder.layers.11.fc2.qweight', 'model.decoder.layers.11.fc2.qzeros', 'model.decoder.layers.11.fc2.scales', 'model.decoder.layers.11.self_attn.k_proj.qweight', 'model.decoder.layers.11.self_attn.k_proj.qzeros', 'model.decoder.layers.11.self_attn.k_proj.scales', 'model.decoder.layers.11.self_attn.out_proj.qweight', 'model.decoder.layers.11.self_attn.out_proj.qzeros', 'model.decoder.layers.11.self_attn.out_proj.scales', 'model.decoder.layers.11.self_attn.q_proj.qweight', 'model.decoder.layers.11.self_attn.q_proj.qzeros', 'model.decoder.layers.11.self_attn.q_proj.scales', 'model.decoder.layers.11.self_attn.v_proj.qweight', 'model.decoder.layers.11.self_attn.v_proj.qzeros', 'model.decoder.layers.11.self_attn.v_proj.scales', 'model.decoder.layers.12.fc1.qweight', 'model.decoder.layers.12.fc1.qzeros', 'model.decoder.layers.12.fc1.scales', 'model.decoder.layers.12.fc2.qweight', 'model.decoder.layers.12.fc2.qzeros', 'model.decoder.layers.12.fc2.scales', 'model.decoder.layers.12.self_attn.k_proj.qweight', 'model.decoder.layers.12.self_attn.k_proj.qzeros', 'model.decoder.layers.12.self_attn.k_proj.scales', 'model.decoder.layers.12.self_attn.out_proj.qweight', 'model.decoder.layers.12.self_attn.out_proj.qzeros', 'model.decoder.layers.12.self_attn.out_proj.scales', 'model.decoder.layers.12.self_attn.q_proj.qweight', 'model.decoder.layers.12.self_attn.q_proj.qzeros', 'model.decoder.layers.12.self_attn.q_proj.scales', 'model.decoder.layers.12.self_attn.v_proj.qweight', 'model.decoder.layers.12.self_attn.v_proj.qzeros', 'model.decoder.layers.12.self_attn.v_proj.scales', 'model.decoder.layers.13.fc1.qweight', 'model.decoder.layers.13.fc1.qzeros', 'model.decoder.layers.13.fc1.scales', 'model.decoder.layers.13.fc2.qweight', 'model.decoder.layers.13.fc2.qzeros', 'model.decoder.layers.13.fc2.scales', 'model.decoder.layers.13.self_attn.k_proj.qweight', 'model.decoder.layers.13.self_attn.k_proj.qzeros', 'model.decoder.layers.13.self_attn.k_proj.scales', 'model.decoder.layers.13.self_attn.out_proj.qweight', 'model.decoder.layers.13.self_attn.out_proj.qzeros', 'model.decoder.layers.13.self_attn.out_proj.scales', 'model.decoder.layers.13.self_attn.q_proj.qweight', 'model.decoder.layers.13.self_attn.q_proj.qzeros', 'model.decoder.layers.13.self_attn.q_proj.scales', 'model.decoder.layers.13.self_attn.v_proj.qweight', 'model.decoder.layers.13.self_attn.v_proj.qzeros', 'model.decoder.layers.13.self_attn.v_proj.scales', 'model.decoder.layers.14.fc1.qweight', 'model.decoder.layers.14.fc1.qzeros', 'model.decoder.layers.14.fc1.scales', 'model.decoder.layers.14.fc2.qweight', 'model.decoder.layers.14.fc2.qzeros', 'model.decoder.layers.14.fc2.scales', 'model.decoder.layers.14.self_attn.k_proj.qweight', 'model.decoder.layers.14.self_attn.k_proj.qzeros', 'model.decoder.layers.14.self_attn.k_proj.scales', 'model.decoder.layers.14.self_attn.out_proj.qweight', 'model.decoder.layers.14.self_attn.out_proj.qzeros', 'model.decoder.layers.14.self_attn.out_proj.scales', 'model.decoder.layers.14.self_attn.q_proj.qweight', 'model.decoder.layers.14.self_attn.q_proj.qzeros', 'model.decoder.layers.14.self_attn.q_proj.scales', 'model.decoder.layers.14.self_attn.v_proj.qweight', 'model.decoder.layers.14.self_attn.v_proj.qzeros', 'model.decoder.layers.14.self_attn.v_proj.scales', 'model.decoder.layers.15.fc1.qweight', 'model.decoder.layers.15.fc1.qzeros', 'model.decoder.layers.15.fc1.scales', 'model.decoder.layers.15.fc2.qweight', 'model.decoder.layers.15.fc2.qzeros', 'model.decoder.layers.15.fc2.scales', 'model.decoder.layers.15.self_attn.k_proj.qweight', 'model.decoder.layers.15.self_attn.k_proj.qzeros', 'model.decoder.layers.15.self_attn.k_proj.scales', 'model.decoder.layers.15.self_attn.out_proj.qweight', 'model.decoder.layers.15.self_attn.out_proj.qzeros', 'model.decoder.layers.15.self_attn.out_proj.scales', 'model.decoder.layers.15.self_attn.q_proj.qweight', 'model.decoder.layers.15.self_attn.q_proj.qzeros', 'model.decoder.layers.15.self_attn.q_proj.scales', 'model.decoder.layers.15.self_attn.v_proj.qweight', 'model.decoder.layers.15.self_attn.v_proj.qzeros', 'model.decoder.layers.15.self_attn.v_proj.scales', 'model.decoder.layers.16.fc1.qweight', 'model.decoder.layers.16.fc1.qzeros', 'model.decoder.layers.16.fc1.scales', 'model.decoder.layers.16.fc2.qweight', 'model.decoder.layers.16.fc2.qzeros', 'model.decoder.layers.16.fc2.scales', 'model.decoder.layers.16.self_attn.k_proj.qweight', 'model.decoder.layers.16.self_attn.k_proj.qzeros', 'model.decoder.layers.16.self_attn.k_proj.scales', 'model.decoder.layers.16.self_attn.out_proj.qweight', 'model.decoder.layers.16.self_attn.out_proj.qzeros', 'model.decoder.layers.16.self_attn.out_proj.scales', 'model.decoder.layers.16.self_attn.q_proj.qweight', 'model.decoder.layers.16.self_attn.q_proj.qzeros', 'model.decoder.layers.16.self_attn.q_proj.scales', 'model.decoder.layers.16.self_attn.v_proj.qweight', 'model.decoder.layers.16.self_attn.v_proj.qzeros', 'model.decoder.layers.16.self_attn.v_proj.scales', 'model.decoder.layers.17.fc1.qweight', 'model.decoder.layers.17.fc1.qzeros', 'model.decoder.layers.17.fc1.scales', 'model.decoder.layers.17.fc2.qweight', 'model.decoder.layers.17.fc2.qzeros', 'model.decoder.layers.17.fc2.scales', 'model.decoder.layers.17.self_attn.k_proj.qweight', 'model.decoder.layers.17.self_attn.k_proj.qzeros', 'model.decoder.layers.17.self_attn.k_proj.scales', 'model.decoder.layers.17.self_attn.out_proj.qweight', 'model.decoder.layers.17.self_attn.out_proj.qzeros', 'model.decoder.layers.17.self_attn.out_proj.scales', 'model.decoder.layers.17.self_attn.q_proj.qweight', 'model.decoder.layers.17.self_attn.q_proj.qzeros', 'model.decoder.layers.17.self_attn.q_proj.scales', 'model.decoder.layers.17.self_attn.v_proj.qweight', 'model.decoder.layers.17.self_attn.v_proj.qzeros', 'model.decoder.layers.17.self_attn.v_proj.scales', 'model.decoder.layers.18.fc1.qweight', 'model.decoder.layers.18.fc1.qzeros', 'model.decoder.layers.18.fc1.scales', 'model.decoder.layers.18.fc2.qweight', 'model.decoder.layers.18.fc2.qzeros', 'model.decoder.layers.18.fc2.scales', 'model.decoder.layers.18.self_attn.k_proj.qweight', 'model.decoder.layers.18.self_attn.k_proj.qzeros', 'model.decoder.layers.18.self_attn.k_proj.scales', 'model.decoder.layers.18.self_attn.out_proj.qweight', 'model.decoder.layers.18.self_attn.out_proj.qzeros', 'model.decoder.layers.18.self_attn.out_proj.scales', 'model.decoder.layers.18.self_attn.q_proj.qweight', 'model.decoder.layers.18.self_attn.q_proj.qzeros', 'model.decoder.layers.18.self_attn.q_proj.scales', 'model.decoder.layers.18.self_attn.v_proj.qweight', 'model.decoder.layers.18.self_attn.v_proj.qzeros', 'model.decoder.layers.18.self_attn.v_proj.scales', 'model.decoder.layers.19.fc1.qweight', 'model.decoder.layers.19.fc1.qzeros', 'model.decoder.layers.19.fc1.scales', 'model.decoder.layers.19.fc2.qweight', 'model.decoder.layers.19.fc2.qzeros', 'model.decoder.layers.19.fc2.scales', 'model.decoder.layers.19.self_attn.k_proj.qweight', 'model.decoder.layers.19.self_attn.k_proj.qzeros', 'model.decoder.layers.19.self_attn.k_proj.scales', 'model.decoder.layers.19.self_attn.out_proj.qweight', 'model.decoder.layers.19.self_attn.out_proj.qzeros', 'model.decoder.layers.19.self_attn.out_proj.scales', 'model.decoder.layers.19.self_attn.q_proj.qweight', 'model.decoder.layers.19.self_attn.q_proj.qzeros', 'model.decoder.layers.19.self_attn.q_proj.scales', 'model.decoder.layers.19.self_attn.v_proj.qweight', 'model.decoder.layers.19.self_attn.v_proj.qzeros', 'model.decoder.layers.19.self_attn.v_proj.scales', 'model.decoder.layers.2.fc1.qweight', 'model.decoder.layers.2.fc1.qzeros', 'model.decoder.layers.2.fc1.scales', 'model.decoder.layers.2.fc2.qweight', 'model.decoder.layers.2.fc2.qzeros', 'model.decoder.layers.2.fc2.scales', 'model.decoder.layers.2.self_attn.k_proj.qweight', 'model.decoder.layers.2.self_attn.k_proj.qzeros', 'model.decoder.layers.2.self_attn.k_proj.scales', 'model.decoder.layers.2.self_attn.out_proj.qweight', 'model.decoder.layers.2.self_attn.out_proj.qzeros', 'model.decoder.layers.2.self_attn.out_proj.scales', 'model.decoder.layers.2.self_attn.q_proj.qweight', 'model.decoder.layers.2.self_attn.q_proj.qzeros', 'model.decoder.layers.2.self_attn.q_proj.scales', 'model.decoder.layers.2.self_attn.v_proj.qweight', 'model.decoder.layers.2.self_attn.v_proj.qzeros', 'model.decoder.layers.2.self_attn.v_proj.scales', 'model.decoder.layers.20.fc1.qweight', 'model.decoder.layers.20.fc1.qzeros', 'model.decoder.layers.20.fc1.scales', 'model.decoder.layers.20.fc2.qweight', 'model.decoder.layers.20.fc2.qzeros', 'model.decoder.layers.20.fc2.scales', 'model.decoder.layers.20.self_attn.k_proj.qweight', 'model.decoder.layers.20.self_attn.k_proj.qzeros', 'model.decoder.layers.20.self_attn.k_proj.scales', 'model.decoder.layers.20.self_attn.out_proj.qweight', 'model.decoder.layers.20.self_attn.out_proj.qzeros', 'model.decoder.layers.20.self_attn.out_proj.scales', 'model.decoder.layers.20.self_attn.q_proj.qweight', 'model.decoder.layers.20.self_attn.q_proj.qzeros', 'model.decoder.layers.20.self_attn.q_proj.scales', 'model.decoder.layers.20.self_attn.v_proj.qweight', 'model.decoder.layers.20.self_attn.v_proj.qzeros', 'model.decoder.layers.20.self_attn.v_proj.scales', 'model.decoder.layers.21.fc1.qweight', 'model.decoder.layers.21.fc1.qzeros', 'model.decoder.layers.21.fc1.scales', 'model.decoder.layers.21.fc2.qweight', 'model.decoder.layers.21.fc2.qzeros', 'model.decoder.layers.21.fc2.scales', 'model.decoder.layers.21.self_attn.k_proj.qweight', 'model.decoder.layers.21.self_attn.k_proj.qzeros', 'model.decoder.layers.21.self_attn.k_proj.scales', 'model.decoder.layers.21.self_attn.out_proj.qweight', 'model.decoder.layers.21.self_attn.out_proj.qzeros', 'model.decoder.layers.21.self_attn.out_proj.scales', 'model.decoder.layers.21.self_attn.q_proj.qweight', 'model.decoder.layers.21.self_attn.q_proj.qzeros', 'model.decoder.layers.21.self_attn.q_proj.scales', 'model.decoder.layers.21.self_attn.v_proj.qweight', 'model.decoder.layers.21.self_attn.v_proj.qzeros', 'model.decoder.layers.21.self_attn.v_proj.scales', 'model.decoder.layers.22.fc1.qweight', 'model.decoder.layers.22.fc1.qzeros', 'model.decoder.layers.22.fc1.scales', 'model.decoder.layers.22.fc2.qweight', 'model.decoder.layers.22.fc2.qzeros', 'model.decoder.layers.22.fc2.scales', 'model.decoder.layers.22.self_attn.k_proj.qweight', 'model.decoder.layers.22.self_attn.k_proj.qzeros', 'model.decoder.layers.22.self_attn.k_proj.scales', 'model.decoder.layers.22.self_attn.out_proj.qweight', 'model.decoder.layers.22.self_attn.out_proj.qzeros', 'model.decoder.layers.22.self_attn.out_proj.scales', 'model.decoder.layers.22.self_attn.q_proj.qweight', 'model.decoder.layers.22.self_attn.q_proj.qzeros', 'model.decoder.layers.22.self_attn.q_proj.scales', 'model.decoder.layers.22.self_attn.v_proj.qweight', 'model.decoder.layers.22.self_attn.v_proj.qzeros', 'model.decoder.layers.22.self_attn.v_proj.scales', 'model.decoder.layers.23.fc1.qweight', 'model.decoder.layers.23.fc1.qzeros', 'model.decoder.layers.23.fc1.scales', 'model.decoder.layers.23.fc2.qweight', 'model.decoder.layers.23.fc2.qzeros', 'model.decoder.layers.23.fc2.scales', 'model.decoder.layers.23.self_attn.k_proj.qweight', 'model.decoder.layers.23.self_attn.k_proj.qzeros', 'model.decoder.layers.23.self_attn.k_proj.scales', 'model.decoder.layers.23.self_attn.out_proj.qweight', 'model.decoder.layers.23.self_attn.out_proj.qzeros', 'model.decoder.layers.23.self_attn.out_proj.scales', 'model.decoder.layers.23.self_attn.q_proj.qweight', 'model.decoder.layers.23.self_attn.q_proj.qzeros', 'model.decoder.layers.23.self_attn.q_proj.scales', 'model.decoder.layers.23.self_attn.v_proj.qweight', 'model.decoder.layers.23.self_attn.v_proj.qzeros', 'model.decoder.layers.23.self_attn.v_proj.scales', 'model.decoder.layers.24.fc1.qweight', 'model.decoder.layers.24.fc1.qzeros', 'model.decoder.layers.24.fc1.scales', 'model.decoder.layers.24.fc2.qweight', 'model.decoder.layers.24.fc2.qzeros', 'model.decoder.layers.24.fc2.scales', 'model.decoder.layers.24.self_attn.k_proj.qweight', 'model.decoder.layers.24.self_attn.k_proj.qzeros', 'model.decoder.layers.24.self_attn.k_proj.scales', 'model.decoder.layers.24.self_attn.out_proj.qweight', 'model.decoder.layers.24.self_attn.out_proj.qzeros', 'model.decoder.layers.24.self_attn.out_proj.scales', 'model.decoder.layers.24.self_attn.q_proj.qweight', 'model.decoder.layers.24.self_attn.q_proj.qzeros', 'model.decoder.layers.24.self_attn.q_proj.scales', 'model.decoder.layers.24.self_attn.v_proj.qweight', 'model.decoder.layers.24.self_attn.v_proj.qzeros', 'model.decoder.layers.24.self_attn.v_proj.scales', 'model.decoder.layers.25.fc1.qweight', 'model.decoder.layers.25.fc1.qzeros', 'model.decoder.layers.25.fc1.scales', 'model.decoder.layers.25.fc2.qweight', 'model.decoder.layers.25.fc2.qzeros', 'model.decoder.layers.25.fc2.scales', 'model.decoder.layers.25.self_attn.k_proj.qweight', 'model.decoder.layers.25.self_attn.k_proj.qzeros', 'model.decoder.layers.25.self_attn.k_proj.scales', 'model.decoder.layers.25.self_attn.out_proj.qweight', 'model.decoder.layers.25.self_attn.out_proj.qzeros', 'model.decoder.layers.25.self_attn.out_proj.scales', 'model.decoder.layers.25.self_attn.q_proj.qweight', 'model.decoder.layers.25.self_attn.q_proj.qzeros', 'model.decoder.layers.25.self_attn.q_proj.scales', 'model.decoder.layers.25.self_attn.v_proj.qweight', 'model.decoder.layers.25.self_attn.v_proj.qzeros', 'model.decoder.layers.25.self_attn.v_proj.scales', 'model.decoder.layers.26.fc1.qweight', 'model.decoder.layers.26.fc1.qzeros', 'model.decoder.layers.26.fc1.scales', 'model.decoder.layers.26.fc2.qweight', 'model.decoder.layers.26.fc2.qzeros', 'model.decoder.layers.26.fc2.scales', 'model.decoder.layers.26.self_attn.k_proj.qweight', 'model.decoder.layers.26.self_attn.k_proj.qzeros', 'model.decoder.layers.26.self_attn.k_proj.scales', 'model.decoder.layers.26.self_attn.out_proj.qweight', 'model.decoder.layers.26.self_attn.out_proj.qzeros', 'model.decoder.layers.26.self_attn.out_proj.scales', 'model.decoder.layers.26.self_attn.q_proj.qweight', 'model.decoder.layers.26.self_attn.q_proj.qzeros', 'model.decoder.layers.26.self_attn.q_proj.scales', 'model.decoder.layers.26.self_attn.v_proj.qweight', 'model.decoder.layers.26.self_attn.v_proj.qzeros', 'model.decoder.layers.26.self_attn.v_proj.scales', 'model.decoder.layers.27.fc1.qweight', 'model.decoder.layers.27.fc1.qzeros', 'model.decoder.layers.27.fc1.scales', 'model.decoder.layers.27.fc2.qweight', 'model.decoder.layers.27.fc2.qzeros', 'model.decoder.layers.27.fc2.scales', 'model.decoder.layers.27.self_attn.k_proj.qweight', 'model.decoder.layers.27.self_attn.k_proj.qzeros', 'model.decoder.layers.27.self_attn.k_proj.scales', 'model.decoder.layers.27.self_attn.out_proj.qweight', 'model.decoder.layers.27.self_attn.out_proj.qzeros', 'model.decoder.layers.27.self_attn.out_proj.scales', 'model.decoder.layers.27.self_attn.q_proj.qweight', 'model.decoder.layers.27.self_attn.q_proj.qzeros', 'model.decoder.layers.27.self_attn.q_proj.scales', 'model.decoder.layers.27.self_attn.v_proj.qweight', 'model.decoder.layers.27.self_attn.v_proj.qzeros', 'model.decoder.layers.27.self_attn.v_proj.scales', 'model.decoder.layers.28.fc1.qweight', 'model.decoder.layers.28.fc1.qzeros', 'model.decoder.layers.28.fc1.scales', 'model.decoder.layers.28.fc2.qweight', 'model.decoder.layers.28.fc2.qzeros', 'model.decoder.layers.28.fc2.scales', 'model.decoder.layers.28.self_attn.k_proj.qweight', 'model.decoder.layers.28.self_attn.k_proj.qzeros', 'model.decoder.layers.28.self_attn.k_proj.scales', 'model.decoder.layers.28.self_attn.out_proj.qweight', 'model.decoder.layers.28.self_attn.out_proj.qzeros', 'model.decoder.layers.28.self_attn.out_proj.scales', 'model.decoder.layers.28.self_attn.q_proj.qweight', 'model.decoder.layers.28.self_attn.q_proj.qzeros', 'model.decoder.layers.28.self_attn.q_proj.scales', 'model.decoder.layers.28.self_attn.v_proj.qweight', 'model.decoder.layers.28.self_attn.v_proj.qzeros', 'model.decoder.layers.28.self_attn.v_proj.scales', 'model.decoder.layers.29.fc1.qweight', 'model.decoder.layers.29.fc1.qzeros', 'model.decoder.layers.29.fc1.scales', 'model.decoder.layers.29.fc2.qweight', 'model.decoder.layers.29.fc2.qzeros', 'model.decoder.layers.29.fc2.scales', 'model.decoder.layers.29.self_attn.k_proj.qweight', 'model.decoder.layers.29.self_attn.k_proj.qzeros', 'model.decoder.layers.29.self_attn.k_proj.scales', 'model.decoder.layers.29.self_attn.out_proj.qweight', 'model.decoder.layers.29.self_attn.out_proj.qzeros', 'model.decoder.layers.29.self_attn.out_proj.scales', 'model.decoder.layers.29.self_attn.q_proj.qweight', 'model.decoder.layers.29.self_attn.q_proj.qzeros', 'model.decoder.layers.29.self_attn.q_proj.scales', 'model.decoder.layers.29.self_attn.v_proj.qweight', 'model.decoder.layers.29.self_attn.v_proj.qzeros', 'model.decoder.layers.29.self_attn.v_proj.scales', 'model.decoder.layers.3.fc1.qweight', 'model.decoder.layers.3.fc1.qzeros', 'model.decoder.layers.3.fc1.scales', 'model.decoder.layers.3.fc2.qweight', 'model.decoder.layers.3.fc2.qzeros', 'model.decoder.layers.3.fc2.scales', 'model.decoder.layers.3.self_attn.k_proj.qweight', 'model.decoder.layers.3.self_attn.k_proj.qzeros', 'model.decoder.layers.3.self_attn.k_proj.scales', 'model.decoder.layers.3.self_attn.out_proj.qweight', 'model.decoder.layers.3.self_attn.out_proj.qzeros', 'model.decoder.layers.3.self_attn.out_proj.scales', 'model.decoder.layers.3.self_attn.q_proj.qweight', 'model.decoder.layers.3.self_attn.q_proj.qzeros', 'model.decoder.layers.3.self_attn.q_proj.scales', 'model.decoder.layers.3.self_attn.v_proj.qweight', 'model.decoder.layers.3.self_attn.v_proj.qzeros', 'model.decoder.layers.3.self_attn.v_proj.scales', 'model.decoder.layers.30.fc1.qweight', 'model.decoder.layers.30.fc1.qzeros', 'model.decoder.layers.30.fc1.scales', 'model.decoder.layers.30.fc2.qweight', 'model.decoder.layers.30.fc2.qzeros', 'model.decoder.layers.30.fc2.scales', 'model.decoder.layers.30.self_attn.k_proj.qweight', 'model.decoder.layers.30.self_attn.k_proj.qzeros', 'model.decoder.layers.30.self_attn.k_proj.scales', 'model.decoder.layers.30.self_attn.out_proj.qweight', 'model.decoder.layers.30.self_attn.out_proj.qzeros', 'model.decoder.layers.30.self_attn.out_proj.scales', 'model.decoder.layers.30.self_attn.q_proj.qweight', 'model.decoder.layers.30.self_attn.q_proj.qzeros', 'model.decoder.layers.30.self_attn.q_proj.scales', 'model.decoder.layers.30.self_attn.v_proj.qweight', 'model.decoder.layers.30.self_attn.v_proj.qzeros', 'model.decoder.layers.30.self_attn.v_proj.scales', 'model.decoder.layers.31.fc1.qweight', 'model.decoder.layers.31.fc1.qzeros', 'model.decoder.layers.31.fc1.scales', 'model.decoder.layers.31.fc2.qweight', 'model.decoder.layers.31.fc2.qzeros', 'model.decoder.layers.31.fc2.scales', 'model.decoder.layers.31.self_attn.k_proj.qweight', 'model.decoder.layers.31.self_attn.k_proj.qzeros', 'model.decoder.layers.31.self_attn.k_proj.scales', 'model.decoder.layers.31.self_attn.out_proj.qweight', 'model.decoder.layers.31.self_attn.out_proj.qzeros', 'model.decoder.layers.31.self_attn.out_proj.scales', 'model.decoder.layers.31.self_attn.q_proj.qweight', 'model.decoder.layers.31.self_attn.q_proj.qzeros', 'model.decoder.layers.31.self_attn.q_proj.scales', 'model.decoder.layers.31.self_attn.v_proj.qweight', 'model.decoder.layers.31.self_attn.v_proj.qzeros', 'model.decoder.layers.31.self_attn.v_proj.scales', 'model.decoder.layers.4.fc1.qweight', 'model.decoder.layers.4.fc1.qzeros', 'model.decoder.layers.4.fc1.scales', 'model.decoder.layers.4.fc2.qweight', 'model.decoder.layers.4.fc2.qzeros', 'model.decoder.layers.4.fc2.scales', 'model.decoder.layers.4.self_attn.k_proj.qweight', 'model.decoder.layers.4.self_attn.k_proj.qzeros', 'model.decoder.layers.4.self_attn.k_proj.scales', 'model.decoder.layers.4.self_attn.out_proj.qweight', 'model.decoder.layers.4.self_attn.out_proj.qzeros', 'model.decoder.layers.4.self_attn.out_proj.scales', 'model.decoder.layers.4.self_attn.q_proj.qweight', 'model.decoder.layers.4.self_attn.q_proj.qzeros', 'model.decoder.layers.4.self_attn.q_proj.scales', 'model.decoder.layers.4.self_attn.v_proj.qweight', 'model.decoder.layers.4.self_attn.v_proj.qzeros', 'model.decoder.layers.4.self_attn.v_proj.scales', 'model.decoder.layers.5.fc1.qweight', 'model.decoder.layers.5.fc1.qzeros', 'model.decoder.layers.5.fc1.scales', 'model.decoder.layers.5.fc2.qweight', 'model.decoder.layers.5.fc2.qzeros', 'model.decoder.layers.5.fc2.scales', 'model.decoder.layers.5.self_attn.k_proj.qweight', 'model.decoder.layers.5.self_attn.k_proj.qzeros', 'model.decoder.layers.5.self_attn.k_proj.scales', 'model.decoder.layers.5.self_attn.out_proj.qweight', 'model.decoder.layers.5.self_attn.out_proj.qzeros', 'model.decoder.layers.5.self_attn.out_proj.scales', 'model.decoder.layers.5.self_attn.q_proj.qweight', 'model.decoder.layers.5.self_attn.q_proj.qzeros', 'model.decoder.layers.5.self_attn.q_proj.scales', 'model.decoder.layers.5.self_attn.v_proj.qweight', 'model.decoder.layers.5.self_attn.v_proj.qzeros', 'model.decoder.layers.5.self_attn.v_proj.scales', 'model.decoder.layers.6.fc1.qweight', 'model.decoder.layers.6.fc1.qzeros', 'model.decoder.layers.6.fc1.scales', 'model.decoder.layers.6.fc2.qweight', 'model.decoder.layers.6.fc2.qzeros', 'model.decoder.layers.6.fc2.scales', 'model.decoder.layers.6.self_attn.k_proj.qweight', 'model.decoder.layers.6.self_attn.k_proj.qzeros', 'model.decoder.layers.6.self_attn.k_proj.scales', 'model.decoder.layers.6.self_attn.out_proj.qweight', 'model.decoder.layers.6.self_attn.out_proj.qzeros', 'model.decoder.layers.6.self_attn.out_proj.scales', 'model.decoder.layers.6.self_attn.q_proj.qweight', 'model.decoder.layers.6.self_attn.q_proj.qzeros', 'model.decoder.layers.6.self_attn.q_proj.scales', 'model.decoder.layers.6.self_attn.v_proj.qweight', 'model.decoder.layers.6.self_attn.v_proj.qzeros', 'model.decoder.layers.6.self_attn.v_proj.scales', 'model.decoder.layers.7.fc1.qweight', 'model.decoder.layers.7.fc1.qzeros', 'model.decoder.layers.7.fc1.scales', 'model.decoder.layers.7.fc2.qweight', 'model.decoder.layers.7.fc2.qzeros', 'model.decoder.layers.7.fc2.scales', 'model.decoder.layers.7.self_attn.k_proj.qweight', 'model.decoder.layers.7.self_attn.k_proj.qzeros', 'model.decoder.layers.7.self_attn.k_proj.scales', 'model.decoder.layers.7.self_attn.out_proj.qweight', 'model.decoder.layers.7.self_attn.out_proj.qzeros', 'model.decoder.layers.7.self_attn.out_proj.scales', 'model.decoder.layers.7.self_attn.q_proj.qweight', 'model.decoder.layers.7.self_attn.q_proj.qzeros', 'model.decoder.layers.7.self_attn.q_proj.scales', 'model.decoder.layers.7.self_attn.v_proj.qweight', 'model.decoder.layers.7.self_attn.v_proj.qzeros', 'model.decoder.layers.7.self_attn.v_proj.scales', 'model.decoder.layers.8.fc1.qweight', 'model.decoder.layers.8.fc1.qzeros', 'model.decoder.layers.8.fc1.scales', 'model.decoder.layers.8.fc2.qweight', 'model.decoder.layers.8.fc2.qzeros', 'model.decoder.layers.8.fc2.scales', 'model.decoder.layers.8.self_attn.k_proj.qweight', 'model.decoder.layers.8.self_attn.k_proj.qzeros', 'model.decoder.layers.8.self_attn.k_proj.scales', 'model.decoder.layers.8.self_attn.out_proj.qweight', 'model.decoder.layers.8.self_attn.out_proj.qzeros', 'model.decoder.layers.8.self_attn.out_proj.scales', 'model.decoder.layers.8.self_attn.q_proj.qweight', 'model.decoder.layers.8.self_attn.q_proj.qzeros', 'model.decoder.layers.8.self_attn.q_proj.scales', 'model.decoder.layers.8.self_attn.v_proj.qweight', 'model.decoder.layers.8.self_attn.v_proj.qzeros', 'model.decoder.layers.8.self_attn.v_proj.scales', 'model.decoder.layers.9.fc1.qweight', 'model.decoder.layers.9.fc1.qzeros', 'model.decoder.layers.9.fc1.scales', 'model.decoder.layers.9.fc2.qweight', 'model.decoder.layers.9.fc2.qzeros', 'model.decoder.layers.9.fc2.scales', 'model.decoder.layers.9.self_attn.k_proj.qweight', 'model.decoder.layers.9.self_attn.k_proj.qzeros', 'model.decoder.layers.9.self_attn.k_proj.scales', 'model.decoder.layers.9.self_attn.out_proj.qweight', 'model.decoder.layers.9.self_attn.out_proj.qzeros', 'model.decoder.layers.9.self_attn.out_proj.scales', 'model.decoder.layers.9.self_attn.q_proj.qweight', 'model.decoder.layers.9.self_attn.q_proj.qzeros', 'model.decoder.layers.9.self_attn.q_proj.scales', 'model.decoder.layers.9.self_attn.v_proj.qweight', 'model.decoder.layers.9.self_attn.v_proj.qzeros', 'model.decoder.layers.9.self_attn.v_proj.scales']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(quan_model_dir)\n",
    "model = AutoModelForCausalLM.from_pretrained(quan_model_dir, device_map=\"cuda\").to(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "52cc065b-01cd-46b7-a0ab-22d174d1cf20",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_text(text):\n",
    "    inputs = tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "\n",
    "    out = model.generate(**inputs, max_new_tokens=64)\n",
    "    return tokenizer.decode(out[0], skip_special_tokens=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "cc9b1f11-b58e-49d5-a203-751e1da28dd1",
   "metadata": {},
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "probability tensor contains either `inf`, `nan` or element < 0",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[37], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_text\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMerry Christmas! I\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mm glad to\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(result)\n",
      "Cell \u001b[0;32mIn[36], line 4\u001b[0m, in \u001b[0;36mgenerate_text\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mgenerate_text\u001b[39m(text):\n\u001b[1;32m      2\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m tokenizer(text, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m     out \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_new_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m64\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer\u001b[38;5;241m.\u001b[39mdecode(out[\u001b[38;5;241m0\u001b[39m], skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
      "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.10/site-packages/transformers/generation/utils.py:2252\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m   2244\u001b[0m     input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m   2245\u001b[0m         input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m   2246\u001b[0m         expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[1;32m   2247\u001b[0m         is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m   2248\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m   2249\u001b[0m     )\n\u001b[1;32m   2251\u001b[0m     \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 2252\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2253\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2254\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2255\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2256\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2257\u001b[0m \u001b[43m        \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2258\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2259\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2260\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2262\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m   2263\u001b[0m     \u001b[38;5;66;03m# 11. prepare beam search scorer\u001b[39;00m\n\u001b[1;32m   2264\u001b[0m     beam_scorer \u001b[38;5;241m=\u001b[39m BeamSearchScorer(\n\u001b[1;32m   2265\u001b[0m         batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m   2266\u001b[0m         num_beams\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_beams,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   2271\u001b[0m         max_length\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mmax_length,\n\u001b[1;32m   2272\u001b[0m     )\n",
      "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.10/site-packages/transformers/generation/utils.py:3297\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m   3295\u001b[0m     probs \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mfunctional\u001b[38;5;241m.\u001b[39msoftmax(next_token_scores, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m   3296\u001b[0m     \u001b[38;5;66;03m# TODO (joao): this OP throws \"skipping cudagraphs due to ['incompatible ops']\", find solution\u001b[39;00m\n\u001b[0;32m-> 3297\u001b[0m     next_tokens \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmultinomial\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprobs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m   3298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3299\u001b[0m     next_tokens \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39margmax(next_token_scores, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "\u001b[0;31mRuntimeError\u001b[0m: probability tensor contains either `inf`, `nan` or element < 0"
     ]
    }
   ],
   "source": [
    "result = generate_text(\"Merry Christmas! I'm glad to\")\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "331046e8-a689-4f65-a3a7-1a95b2d75930",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
